##---------------------------------------------REMOVE ALL OBJECTS FROM MEMORY------------------------------------------##
rm(list=ls()) 
## --------------------------------------------- SET WORKING DIRECTORY ------------------------------------------##
setwd("~/WP1")
.libPaths( c( .libPaths(), "~/R/packages/3.1.0") )
## --------------------------------------------- INSTALL AND LOAD PACKAGES --------------------------------------------##
install.packages("car", lib="~/R/packages/3.2.1")
library(car, lib.loc="~/R/packages/3.2.1")
install.packages("reshape", lib="~/R/packages/3.2.1")
library(reshape, lib.loc="~/R/packages/3.2.1")
install.packages("plyr", lib="~/R/packages/3.2.1")
library(plyr, lib.loc="~/R/packages/3.2.1")
install.packages("arm", lib="~/R/packages/3.2.1")
library(arm)
install.packages("mice", lib="~/R/packages/3.2.1")
library(mice)
install.packages("ggplot2", lib="~/R/packages/3.2.1")
library(ggplot2)
install.packages("grid", lib="~/R/packages/3.2.1")
library(grid)
install.packages("RColorBrewer", lib="~/R/packages/3.2.1")
library(RColorBrewer)
library(foreign)
## --------------------------------------------- READING IN DATA FROM A FILE ------------------------------------------##
load("~/WP1/cache/bcs2004_WP1_Imputed.RData")

## --------------------------------------------- EXTRACT RESULTS FROM IMPUTATION ------------------------------------------##
# Model 1 
fit1 <- with(imp, lm(logsalary34 ~ 1, subset=(working=="1"& alevel=="1")   ))                                 
est1<-pool(fit1)
summary(est1)
pool.r.squared(fit1, adjusted = TRUE)
tab1<-round(summary(pool(fit1)),8)
tab1

# Model 2 
fit2 <- with(imp, lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(sc34=="II Managerial-technical")
+ I(sc34=="3.1 Skilled Non-Manual") + I(sc34=="3.2 Skilled Manual")
+ I(sc34=="IV Partly Skilled") + I(sc34=="V Unskilled") + I(sc34=="Others")+I(residence=="East Midlands") 
+ I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
+ I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
+ I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales"), 
subset=(working=="1"& alevel=="1") ))
est2<-pool(fit2)
summary(est2)
pool.r.squared(fit2, adjusted = TRUE)
tab2<-round(summary(pool(fit2)),8)
tab2       
# Model 3 
fit3 <- with(imp, lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(sc34=="II Managerial-technical")
+ I(sc34=="3.1 Skilled Non-Manual") + I(sc34=="3.2 Skilled Manual")
+ I(sc34=="IV Partly Skilled") + I(sc34=="V Unskilled") + I(sc34=="Others")+I(residence=="East Midlands") 
+ I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
+ I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
+ I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
+ mathematics + science + humanities + socialscience + other, subset=(working=="1"& alevel=="1") )) 
est3<-pool(fit3)
summary(est3)
pool.r.squared(fit3, adjusted = TRUE)
tab3<-round(summary(pool(fit3)),8)
tab3       
# Model 4 - Baseline income ô7316.79
fit4 <- with(imp, lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(residence=="East Midlands") 
+ I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
+ I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
+ I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
+ I(sc34=="II Managerial-technical")+ I(sc34=="3.1 Skilled Non-Manual") + I(sc34=="3.2 Skilled Manual")
+ I(sc34=="IV Partly Skilled") + I(sc34=="V Unskilled") + I(sc34=="Others")+ degree + NVQ + prof 
+ highereddip + mathematics + science + humanities + socialscience + other +  z_b10maths + z_b10read , 
subset=(working=="1"& alevel=="1")    ))
est4<-pool(fit4)
summary(est4)
pool.r.squared(fit4, adjusted = TRUE)
tab4<-round(summary(pool(fit4)),8)
tab4       
       
# Model 5 
fit5 <- with(imp, lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(residence=="East Midlands") 
+ I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
+ I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
+ I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
+ I(sc34=="II Managerial-technical")+ I(sc34=="3.1 Skilled Non-Manual") + I(sc34=="3.2 Skilled Manual")
+ I(sc34=="IV Partly Skilled") + I(sc34=="V Unskilled") + I(sc34=="Others")+ degree + NVQ + prof 
+ highereddip + mathematics + science + humanities + socialscience + other + z_workexp + z_workexpsq
+ z_tenure + z_unemp + z_b10maths + z_b10read ,subset=(working=="1"& alevel=="1")    ))                                 
est5<-pool(fit5)
summary(est5)
pool.r.squared(fit5, adjusted = TRUE)
tab5<-round(summary(pool(fit5)),8)
tab5

# Model 6 

fit6 <- with(imp, lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime + I(residence=="East Midlands") 
+ I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
+ I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
+ I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
+ I(parentsc=="II Managerial-technical")+ I(parentsc=="3.1 Skilled Non-Manual") + I(parentsc=="3.2 Skilled Manual")
+ I(parentsc=="IV Partly Skilled") + I(parentsc=="V Unskilled")+ degree + NVQ + prof 
+ highereddip + mathematics + science + humanities + socialscience + other + z_workexp + z_workexpsq
+ z_tenure + z_unemp + z_b10maths + z_b10read + z_maths_olevel + z_english_olevel + z_mother_edfinish 
+ z_father_edfinish + mother_employment + school + schoolmix + post18education + profocc_asp, 
subset=(working=="1"& alevel=="1")   ))                                 
est6<-pool(fit6)
summary(est6)
pool.r.squared(fit6, adjusted = TRUE)
tab6<-round(summary(pool(fit6)),8)
tab6

##----------------------------------------- GRAPHING REGRESSION OUTPUTS -----------------------------------------## 

# Coefficient plot [1] Model 5
coef.vect <-est5$qbar[2:36]
sd.vect <-tab5[,2]
sd.vect <-sd.vect[2:36]
coefs<-100*(exp(coef.vect)-1)
sds<-100*(exp(sd.vect)-1)
# Generate lower and upper bounds
coef.min <-coefs-(1.96*sds)
coef.max <-coefs+(1.96*sds)
longnames <-c("Female","Married","Children","Part time","Residence: East Midlands","Residence:East of England","Residence:North East",
              "Residence:North West","Residence:South East","Residence:South West",
              "Residence:West Midlands","Residence:Yorkshire & Humber","Residence:Scotland","Residence:Wales",
			  "SC: II Mangerial-technical","SC: 3.1 Skilled Non-Manual","SC: 3.2 Skilled Manual",
              "SC: IV Partly Skilled","SC: V Unskilled","SC: Others","Degree",
              "NVQ Level 4/5","Professional Qualification","HE diploma","Mathematics & Computing A level"
              ,"Science A level","Humanities A level","Social science A level","Other A level","Work Experience(months)",
              "Work Experience Squared(Months)","Current Job Tenure(Months)","Unemployment(Months)","Age 10 Maths Score","Age 10 Reading Score")
			  
# Create data frame
d <-data.frame(coefs,sds,coef.min, coef.max, longnames)

# Order parameter names
d$longnames.1 <- as.character(d$longnames)
d$longnames <- factor(d$longnames.1, levels=rev(unique(d$longnames.1)))

# Generate relevant shades
palette <- brewer.pal("Greys", n=9)
color.background = palette[1]
color.grid.major = palette[3]
color.axis.text = palette[6]
color.axis.title = palette[7]
color.title = palette[9]

# Plot - Render in Mac OSX if possible (but set dpi high in Windows if not)
k <-ggplot() + 
  geom_pointrange(data=d, mapping=aes(x=d$longnames,y=d$coefs, ymin=d$coef.min, ymax=d$coef.max), position="identity", width=0.5, size=1, color="black") + 
theme_bw() +
  # Set the entire chart region to a light gray color
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_blank(), axis.line=element_line(), axis.line.y=element_blank()) +
  # Format the grid
  theme(panel.grid.major=element_line(colour=color.background,size=.75)) +
  theme(axis.ticks=element_blank()) +
  theme(axis.line.x =element_line(colour="#535353", size=.75))+
  # Dispose of the legend
  theme(legend.position="none") +
  # Set title and axis labels, and format these and tick marks
  #ggtitle("Regression Estimates") + 
  theme(plot.title = element_text(size = 20, hjust=-.8, vjust=2.6, face="bold"))+
  xlab("") +
  ylab("Percentage change in income") +
  theme(axis.text.x=element_text(size=11,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=11,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=11,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=11,colour="#535353",face="bold",vjust=-.5)) +
  # Big bold line at y=0
  geom_hline(yintercept=0,size=1.2, alpha=0.2,colour="#535353", linetype="twodash")+
  # Plot margins 
  theme(plot.margin = unit(c(1, 1, .5, .7), "cm"))
  k + coord_flip()   
  ggsave("coefplot_model5.png", dpi=1000, width=10, height=10)
  
 # Coefficient plot [2] Model 6
coef.vect <-est6$qbar[2:44]
sd.vect <-tab6[,2]
sd.vect <-sd.vect[2:44]
coefs<-100*(exp(coef.vect)-1)
sds<-100*(exp(sd.vect)-1)
# Generate lower and upper bounds
coef.min <-coefs-(1.96*sds)
coef.max <-coefs+(1.96*sds)
longnames <-c("Female","Married","Children","Part time","Residence: East Midlands","Residence:East of England","Residence:North East",
              "Residence:North West","Residence:South East","Residence:South West",
              "Residence:West Midlands","Residence:Yorkshire & Humber","Residence:Scotland","Residence:Wales",
			  "PSC: II Mangerial-technical","PSC: 3.1 Skilled Non-Manual","PSC: 3.2 Skilled Manual",
              "PSC: IV Partly Skilled","PSC: V Unskilled","Degree",
              "NVQ Level 4/5","Professional Qualification","HE diploma","Mathematics & Computing A level"
              ,"Science A level","Humanities A level","Social science A level","Other A level","Work Experience(months)",
              "Work Experience Squared(Months)","Current Job Tenure(Months)","Unemployment(Months)","Age 10 Maths Score","Age 10 Reading Score",
			  "Maths O-level", "English O-level", "Age Mother finished education", "Age father finished education", "Mother employed when CM 16?",
			  "Attended a private school", "Attended a single-sex school", "CM had post 18 education aspirations", "CM had professional job aspirations")
			  
# Create data frame
d <-data.frame(coefs,sds,coef.min, coef.max, longnames)

# Order parameter names
d$longnames.1 <- as.character(d$longnames)
d$longnames <- factor(d$longnames.1, levels=rev(unique(d$longnames.1)))

# Generate relevant shades
palette <- brewer.pal("Greys", n=9)
color.background = palette[1]
color.grid.major = palette[3]
color.axis.text = palette[6]
color.axis.title = palette[7]
color.title = palette[9]

# Plot - Render in Mac OSX if possible (but set dpi high in Windows if not)
k <-ggplot() + 
  geom_pointrange(data=d, mapping=aes(x=d$longnames,y=d$coefs, ymin=d$coef.min, ymax=d$coef.max), position="identity", width=0.5, size=1, color="black") + 
theme_bw() +
  # Set the entire chart region to a light gray color
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_blank(), axis.line=element_line(), axis.line.y=element_blank()) +
  # Format the grid
  theme(panel.grid.major=element_line(colour=color.background,size=.75)) +
  theme(axis.ticks=element_blank()) +
  theme(axis.line.x =element_line(colour="#535353", size=.75))+
  # Dispose of the legend
  theme(legend.position="none") +
  # Set title and axis labels, and format these and tick marks
  #ggtitle("Regression Estimates") + 
  theme(plot.title = element_text(size = 20, hjust=-.8, vjust=2.6, face="bold"))+
  xlab("") +
  ylab("Percentage change in income") +
  theme(axis.text.x=element_text(size=11,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=11,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=11,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=11,colour="#535353",face="bold",vjust=-.5)) +
  # Big bold line at y=0
  geom_hline(yintercept=0,size=1.2, alpha=0.2,colour="#535353", linetype="twodash")+
  # Plot margins 
  theme(plot.margin = unit(c(1, 1, .5, .7), "cm"))
  k + coord_flip()   
  ggsave("coefplot_model6.png", dpi=1000, width=10, height=10)
  
##--------------------------------------------------------- PREDICTION ---------------------------------------------------------##  
bcsImputed <-complete(imp, 3)
bcsImputedSubset <- bcsImputed[ which(bcsImputed$working=='1' & bcsImputed$alevel=="1"), ]
fit1<-(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(residence=="East Midlands") 
       + I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
       + I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
       + I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
       + I(sc34=="II Managerial-technical")+ I(sc34=="3.1 Skilled Non-Manual") + I(sc34=="3.2 Skilled Manual")
       + I(sc34=="IV Partly Skilled") + I(sc34=="V Unskilled") + I(sc34=="Others")+ degree + NVQ + prof 
       + highereddip + mathematics + science + humanities + socialscience + other + z_workexp + z_workexpsq
       + z_tenure + z_unemp + z_b10maths + z_b10read, subset=(working=="1"& alevel=="1"), data=bcsImputed)) 
display(fit1)
attach(bcsImputedSubset)

# Gender comparison: married, has children, works full time, resides in London, works in a professional job, has a degree, has mathematics A level, average work experience,
#and average maths and reading scores.
set.seed(1314)
beta.hat <- tab5
male5.est <-(beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
             + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
             + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
             + beta.hat[26]*1 + beta.hat[27]*0 + beta.hat[28]*0 + beta.hat[29]*0 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
             + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
male5.sd <-0.60
male5.pred_log <-rnorm (1000,  male5.est, male5.sd)
male5.pred <-exp(rnorm (1000,  male5.est, male5.sd))
mean.male5.pred.log<-mean(male5.pred_log)
mean.male5.pred.log.lower<-quantile(male5.pred_log,c(.025))
mean.male5.pred.log.upper<-quantile(male5.pred_log,c(.975))
mean.male5.pred<-mean(male5.pred)
mean.male5.pred.lower<-quantile(male5.pred,c(.025))
mean.male5.pred.upper<-quantile(male5.pred,c(.975))

set.seed(1314)
beta.hat <- tab5
female5.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                + beta.hat[26]*1 + beta.hat[27]*0 + beta.hat[28]*0 + beta.hat[29]*0 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
female5.sd <-0.60
female5.pred_log <-rnorm (1000,  female5.est, female5.sd)
female5.pred <-exp(rnorm (1000,  female5.est, female5.sd))
mean.female5.pred.log<-mean(female5.pred_log)
mean.female5.pred.log.lower<-quantile(female5.pred_log,c(.025))
mean.female5.pred.log.upper<-quantile(female5.pred_log,c(.975))
mean.female5.pred<-mean(female5.pred)
mean.female5.pred.lower<-quantile(female5.pred,c(.025))
mean.female5.pred.upper<-quantile(female5.pred,c(.975))

pred5.diff.gender <- male5.pred - female5.pred
mean.pred5.diff.gender <-mean(pred5.diff.gender)
mean.diff5.gender.pred.lower<-quantile(pred5.diff.gender ,c(.025))
mean.diff5.gender.pred.upper<-quantile(pred5.diff.gender ,c(.975))
pred5.ratio.gender <-male5.pred / female5.pred
mean.ratio5.gender<-mean(pred5.ratio.gender)
mean.ratio5.gender.pred.lower<-quantile(pred5.ratio.gender,c(.025))
mean.ratio5.gender.pred.upper<-quantile(pred5.ratio.gender, c(.975))


fit2 <-lm(logsalary34 ~ I(bd7sex=="Female") + married + children + parttime +  I(residence=="East Midlands") 
          + I(residence=="East of England") + I(residence=="North East") + I(residence=="North West") 
          + I(residence=="South East") + I(residence=="South West") + I(residence=="West Midlands") 
          + I(residence=="Yorkshire and The Humber") + I(residence=="Scotland")+ I(residence=="Wales")
          + I(parentsc=="II Managerial-technical")+ I(parentsc=="3.1 Skilled Non-Manual") + I(parentsc=="3.2 Skilled Manual")
          + I(parentsc=="IV Partly Skilled") + I(parentsc=="V Unskilled")+ degree + NVQ + prof 
          + highereddip + mathematics + science + humanities + socialscience + other + z_workexp + z_workexpsq
          + z_tenure + z_unemp + z_b10maths + z_b10read + z_maths_olevel + z_english_olevel + z_mother_edfinish 
          + z_father_edfinish + mother_employment + school + schoolmix + post18education + profocc_asp, data=bcsImputed, subset=(working=="1"& alevel=="1") ) 
display(fit2)
attach(bcsImputedSubset)
set.seed(1314)
beta.hat <- tab6 
male6.est <-(beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
             + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
             + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*1 
             + beta.hat[26]*0 + beta.hat[27]*0 + beta.hat[28]*0 + beta.hat[29]*0 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
             + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
             + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
male6.sd <-0.60
male6.pred_log <-rnorm (1000,  male6.est, male6.sd)
male6.pred <-exp(rnorm (1000,  male6.est, male6.sd))
mean.male6.pred.log<-mean(male6.pred_log)
mean.male6.pred.log.lower<-quantile(male6.pred_log,c(.025))
mean.male6.pred.log.upper<-quantile(male6.pred_log,c(.975))
mean.male6.pred<-mean(male6.pred)
mean.male6.pred.lower<-quantile(male6.pred,c(.025))
mean.male6.pred.upper<-quantile(male6.pred,c(.975))


beta.hat <- tab6 
female6.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*1 
                + beta.hat[26]*0 + beta.hat[27]*0 + beta.hat[28]*0 + beta.hat[29]*0 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
                + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
female6.sd <-0.60
female6.pred_log <-rnorm (1000,  female6.est, female6.sd)
female6.pred <-exp(rnorm (1000,  female6.est, female6.sd))
mean.female6.pred.log<-mean(female6.pred_log)
mean.female6.pred.log.lower<-quantile(female6.pred_log,c(.025))
mean.female6.pred.log.upper<-quantile(female6.pred_log,c(.975))
mean.female6.pred<-mean(female6.pred)
mean.female6.pred.lower<-quantile(female6.pred,c(.025))
mean.female6.pred.upper<-quantile(female6.pred,c(.975))

pred6.diff.gender <- male6.pred - female6.pred
mean.pred6.diff.gender <-mean(pred6.diff.gender)
mean.diff6.gender.pred.lower<-quantile(pred6.diff.gender ,c(.025))
mean.diff6.gender.pred.upper<-quantile(pred6.diff.gender ,c(.975))
pred6.ratio.gender <-male6.pred / female6.pred
mean.ratio6.gender<-mean(pred6.ratio.gender)
mean.ratio6.gender.pred.lower<-quantile(pred6.ratio.gender ,c(.025))
mean.ratio6.gender.pred.upper<-quantile(pred6.ratio.gender ,c(.975))

#--------------------------------------------------------- Maths A level comparison Model 5---------------------------------------------------------
set.seed(1314)
beta.hat <- tab5
male.maths5.est <- (beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                   + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                   + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                   + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                   + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
male.maths5.sd <-0.60
male.maths5.pred_log <-rnorm (1000,  male.maths5.est, male.maths5.sd)
male.maths5.pred <-exp(rnorm (1000,  male.maths5.est, male.maths5.sd))
mean.male.maths5.pred.log<-mean(male.maths5.pred_log)
mean.male.maths5.pred.log.lower<-quantile(male.maths5.pred_log,c(.025))
mean.male.maths5.pred.log.upper<-quantile(male.maths5.pred_log,c(.975))
mean.male5.pred<-mean(male.maths5.pred)
mean.male.maths5.pred.lower<-quantile(male.maths5.pred ,c(.025))
mean.male.maths5.pred.upper<-quantile(male.maths5.pred ,c(.975))

beta.hat <- tab5
male.nomaths5.est <- (beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                     + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                     + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                     + beta.hat[26]*0 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                     + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
male.nomaths5.sd <-0.61
male.nomaths5.pred_log <-rnorm (1000,  male.nomaths5.est, male.nomaths5.sd)
male.nomaths5.pred <-exp(rnorm (1000,  male.nomaths5.est, male.nomaths5.sd))
mean.male.nomaths5.pred.log<-mean(male.nomaths5.pred_log)
mean.male.nomaths5.pred.log.lower<-quantile(male.nomaths5.pred_log,c(.025))
mean.male.nomaths5.pred.log.upper<-quantile(male.nomaths5.pred_log,c(.975))
mean.nomaths5.pred<-mean(male.nomaths5.pred)
mean.male.nomaths5.pred.lower<-quantile(male.nomaths5.pred,c(.025))
mean.male.nomaths5.pred.upper<-quantile(male.nomaths5.pred,c(.975))
male.maths5.nomaths.pred.diff <- male.maths5.pred - male.nomaths5.pred
mean.male.maths5.nomaths.pred.diff <-mean(male.maths5.nomaths.pred.diff)
mean.male.maths5.nomaths.pred.diff.lower<-quantile(male.maths5.nomaths.pred.diff ,c(.025))
mean.male.maths5.nomaths.pred.diff.upper<-quantile(male.maths5.nomaths.pred.diff ,c(.975))
male.maths5.nomaths.pred.ratio <-male.maths5.pred  / male.nomaths5.pred
mean.male.maths5.nomaths.pred.ratio.lower<-quantile(male.maths5.nomaths.pred.ratio ,c(.025))
mean.male.maths5.nomaths.pred.ratio.upper<-quantile(male.maths5.nomaths.pred.ratio ,c(.975))

beta.hat <- tab5
female.maths5.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                   + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                   + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                   + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                   + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
female.maths5.sd <-0.60
female.maths5.pred_log <-rnorm (1000,  female.maths5.est, female.maths5.sd)
female.maths5.pred <-exp(rnorm (1000,  female.maths5.est, female.maths5.sd))
mean.female.maths5.pred.log<-mean(female.maths5.pred_log)
mean.female.maths5.pred.log.lower<-quantile(female.maths5.pred_log,c(.025))
mean.female.maths5.pred.log.upper<-quantile(male.maths5.pred_log,c(.975))
mean.female.maths5.pred<-mean(male.maths5.pred)
mean.female.maths5.pred.lower<-quantile(female.maths5.pred ,c(.025))
mean.female.maths5.pred.upper<-quantile(female.maths5.pred ,c(.975))

beta.hat <- tab5
female.nomaths5.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                     + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                     + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                     + beta.hat[26]*0 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                     + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0)
female.nomaths5.sd <-0.60
female.nomaths5.pred_log <-rnorm (1000,  female.nomaths5.est, female.nomaths5.sd)
female.nomaths5.pred <-exp(rnorm (1000,  female.nomaths5.est, female.nomaths5.sd))
mean.female.nomaths5.pred.log<-mean(female.nomaths5.pred_log)
mean.female.nomaths5.pred.log.lower<-quantile(female.nomaths5.pred_log,c(.025))
mean.female.nomaths5.pred.log.upper<-quantile(female.nomaths5.pred_log,c(.975))
mean.female.nomaths5.pred<-mean(female.nomaths5.pred)
mean.female.nomaths5.pred.lower<-quantile(female.nomaths5.pred,c(.025))
mean.female.nomaths5.pred.upper<-quantile(female.nomaths5.pred,c(.975))
female.maths5.nomaths.pred.diff <- female.maths5.pred - female.nomaths5.pred
mean.female.maths5.nomaths.pred.diff <-mean(female.maths5.nomaths.pred.diff)
mean.female.maths5.nomaths.pred.diff.lower<-quantile(female.maths5.nomaths.pred.diff  ,c(.025))
mean.female.maths5.nomaths.pred.diff.upper<-quantile(female.maths5.nomaths.pred.diff  ,c(.975))
female.maths5.nomaths.pred.ratio <-female.maths5.pred  / female.nomaths5.pred
mean.female.maths5.nomaths.pred.ratio.lower<-quantile(female.maths5.nomaths.pred.ratio,c(.025))
mean.female.maths5.nomaths.pred.ratio.upper<-quantile(female.maths5.nomaths.pred.ratio,c(.975))


#--------------------------------------------------------- Maths A level comparison Model 6---------------------------------------------------------
set.seed(1314)
beta.hat <- tab6
male.maths6.est <- (beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                   + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                   + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*1 
                   + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                   + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
                   + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
male.maths6.sd <-0.60
male.maths6.pred_log <-rnorm (1000,  male.maths6.est, male.maths6.sd)
male.maths6.pred <-exp(rnorm (1000,  male.maths6.est, male.maths6.sd))
mean.male.maths6.pred.log<-mean(male.maths6.pred_log)
mean.male.maths6.pred.log.lower<-quantile(male.maths6.pred_log,c(.025))
mean.male.maths6.pred.log.upper<-quantile(male.maths6.pred_log,c(.975))
mean.male6.pred<-mean(male.maths6.pred)
mean.male.maths6.pred.lower<-quantile(male.maths6.pred ,c(.025))
mean.male.maths6.pred.upper<-quantile(male.maths6.pred ,c(.975))

beta.hat <- tab6
male.nomaths6.est <- (beta.hat[1] + beta.hat[2]*0 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                      + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                      + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                      + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                      + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
                      + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
male.nomaths6.sd <-0.60
male.nomaths6.pred_log <-rnorm (1000,  male.nomaths6.est, male.nomaths6.sd)
male.nomaths6.pred <-exp(rnorm (1000,  male.nomaths6.est, male.nomaths6.sd))
mean.male.nomaths6.pred.log<-mean(male.nomaths6.pred_log)
mean.male.nomaths6.pred.log.lower<-quantile(male.nomaths6.pred_log,c(.025))
mean.male.nomaths6.pred.log.upper<-quantile(male.nomaths6.pred_log,c(.975))
mean.nomaths6.pred<-mean(male.nomaths6.pred)
mean.male.nomaths6.pred.lower<-quantile(male.nomaths6.pred,c(.025))
mean.male.nomaths6.pred.upper<-quantile(male.nomaths6.pred,c(.975))
male.maths6.nomaths.pred.diff <- male.maths6.pred - male.nomaths6.pred
mean.male.maths6.nomaths.pred.diff<-mean(male.maths6.nomaths.pred.diff)
mean.male.maths6.nomaths.pred.diff.lower<-quantile(male.maths6.nomaths.pred.diff ,c(.025))
mean.male.maths6.nomaths.pred.diff.upper<-quantile(male.maths6.nomaths.pred.diff ,c(.975))
male.maths6.nomaths.pred.ratio <-male.maths6.pred  / male.nomaths6.pred
mean.male.maths6.nomaths.pred.ratio<-mean(male.maths6.nomaths.pred.ratio)
mean.male.maths6.nomaths.pred.ratio.lower<-quantile(male.maths6.nomaths.pred.ratio ,c(.025))
mean.male.maths6.nomaths.pred.ratio.upper<-quantile(male.maths6.nomaths.pred.ratio ,c(.975))



beta.hat <- tab6
female.maths6.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                   + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                   + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*1 
                   + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                   + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
                   + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
female.maths6.sd <-0.60
female.maths6.pred_log <-rnorm (1000,  female.maths6.est, female.maths6.sd)
female.maths6.pred <-exp(rnorm (1000,  female.maths6.est, female.maths6.sd))
mean.female.maths6.pred.log<-mean(male.maths6.pred_log)
mean.female.maths6.pred.log.lower<-quantile(female.maths6.pred_log,c(.025))
mean.female.maths6.pred.log.upper<-quantile(female.maths6.pred_log,c(.975))
mean.female6.pred<-mean(female.maths6.pred)
mean.female.maths6.pred.lower<-quantile(female.maths6.pred ,c(.025))
mean.female.maths6.pred.upper<-quantile(female.maths6.pred ,c(.975))


beta.hat <- tab6
female.nomaths6.est <- (beta.hat[1] + beta.hat[2]*1 + beta.hat[3]*1 + beta.hat[4]*1 + beta.hat[5]*0 + beta.hat[6]*0 + beta.hat[7]*0 + beta.hat[8]*0 + beta.hat[9]*0
                        + beta.hat[10]*0 + beta.hat[11]*0 + beta.hat[12]*0 + beta.hat[13]*0 + beta.hat[14]*0 + beta.hat[15]*0 + beta.hat[16]*0 + beta.hat[17]*0 
                        + beta.hat[18]*0 + beta.hat[19]*0 + beta.hat[20]*0 + beta.hat[21]*0 + beta.hat[22]*1 + beta.hat[23]*0 + beta.hat[24]*0 + beta.hat[25]*0 
                        + beta.hat[26]*1 + beta.hat[27]*1 + beta.hat[28]*1 + beta.hat[29]*1 + beta.hat[30]*0 + beta.hat[31]*0 + beta.hat[32]*0 + beta.hat[33]*0 
                        + beta.hat[34]*0 + beta.hat[35]*0 + beta.hat[36]*0 + beta.hat[37]*0 + beta.hat[38]*0 + beta.hat[39]*0 + beta.hat[40]*0 + beta.hat[41]*0
                        + beta.hat[42]*0 + beta.hat[43]*0 + beta.hat[44]*0)
female.nomaths6.sd <-0.60
female.nomaths6.pred_log <-rnorm (1000, female.nomaths6.est, female.nomaths6.sd)
female.nomaths6.pred <-exp(rnorm (1000, female.nomaths6.est, female.nomaths6.sd))
mean.female.nomaths6.pred.log<-mean(female.nomaths6.pred_log)
mean.female.nomaths6.pred.log.lower<-quantile(female.nomaths6.pred_log,c(.025))
mean.female.nomaths6.pred.log.upper<-quantile(female.nomaths6.pred_log,c(.975))
mean.nomaths6.pred<-mean(female.nomaths6.pred)
mean.female.nomaths6.pred.lower<-quantile(female.nomaths6.pred,c(.025))
mean.female.nomaths6.pred.upper<-quantile(female.nomaths6.pred,c(.975))
female.maths6.nomaths.pred.diff <- female.maths6.pred - female.nomaths6.pred
mean.female.maths6.nomaths.pred.diff <-mean(female.maths6.nomaths.pred.diff)
mean.female.maths6.nomaths.pred.diff.lower<-quantile(female.maths6.nomaths.pred.diff ,c(.025))
mean.female.maths6.nomaths.pred.diff.upper<-quantile(female.maths6.nomaths.pred.diff ,c(.975))
female.maths6.nomaths.pred.ratio <-female.maths6.pred  / female.nomaths6.pred
mean.female.maths6.nomaths.pred.ratio <-mean(female.maths6.nomaths.pred.ratio)
mean.female.maths6.nomaths.pred.ratio.lower<-quantile(female.maths6.nomaths.pred.ratio ,c(.025))
mean.female.maths6.nomaths.pred.ratio.upper<-quantile(female.maths6.nomaths.pred.ratio ,c(.975))



# Prediction plots - Scenario 1
options("scipen"=10) 
d=data.frame(pred5.diff.gender,pred6.diff.gender, male.maths5.nomaths.pred.diff, male.maths6.nomaths.pred.diff, female.maths5.nomaths.pred.diff,female.maths6.nomaths.pred.diff) 
p1 <-ggplot(d, aes(x=pred5.diff.gender)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
  # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 1: Predicted difference in earnings (Model 5)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(pred5.diff.gender),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(pred5.diff.gender ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(pred5.diff.gender ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
  
p2 <-ggplot(d, aes(x=pred6.diff.gender)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
    # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 1: Predicted difference in earnings (Model 6)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(pred6.diff.gender),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(pred6.diff.gender ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(pred6.diff.gender ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
  
 p3 <-ggplot(d, aes(x=male.maths5.nomaths.pred.diff)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
    # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 2: Predicted difference in earnings Male Maths vs. No Maths (Model 5)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(male.maths5.nomaths.pred.diff),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(male.maths5.nomaths.pred.diff ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(male.maths5.nomaths.pred.diff ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
  
 p4 <-ggplot(d, aes(x=male.maths6.nomaths.pred.diff)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
    # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 2: Predicted difference in earnings Male Maths vs. No Maths (Model 6)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(male.maths6.nomaths.pred.diff),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(male.maths6.nomaths.pred.diff ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(male.maths6.nomaths.pred.diff ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))
  
 p5 <-ggplot(d, aes(x=female.maths5.nomaths.pred.diff)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
    # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 2: Predicted difference in earnings Female Maths vs. No Maths (Model 5)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(female.maths5.nomaths.pred.diff),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(female.maths5.nomaths.pred.diff ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(female.maths5.nomaths.pred.diff ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))  
  
 p6 <-ggplot(d, aes(x=female.maths6.nomaths.pred.diff)) +
  geom_histogram(binwidth=5000, colour="black", fill="grey")+
    # Begin construction of chart
  theme_bw(base_size=9) +
  theme(panel.background=element_rect(fill=color.background, color=color.background)) +
  theme(plot.background=element_rect(fill=color.background, color=color.background)) +
  theme(panel.border=element_rect(color=color.background)) +
  # Format the grid
  theme(panel.grid.major=element_line(color=color.grid.major,size=.25)) +
  theme(panel.grid.minor=element_blank()) +
  theme(axis.ticks=element_blank()) +
  # Format the legend, but hide by default
  theme(legend.position="none") +
  theme(legend.background = element_rect(fill=color.background)) +
  theme(legend.text = element_text(size=7,color=color.axis.title)) +
  # Set title and axis labels, and format these and tick marks
  ggtitle("Scenario 2: Predicted difference in earnings Female Maths vs. No Maths (Model 6)")+ 
  theme(plot.title=element_text(color=color.title, size=15, face="bold", vjust=1.25)) +
  xlab("Income") +
  ylab("Frequency") +
  theme(axis.text.x=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.text.y=element_text(size=13,colour="#535353",face="bold")) +
  theme(axis.title.y=element_text(size=13,colour="#535353",face="bold",vjust=1.5)) +
  theme(axis.title.x=element_text(size=13,colour="#535353",face="bold",vjust=-.5)) +
  geom_vline(xintercept=mean(female.maths6.nomaths.pred.diff),size=1.2, alpha=.3, colour="red", linetype="twodash")+
  geom_vline(xintercept=quantile(female.maths6.nomaths.pred.diff ,c(.025)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  geom_vline(xintercept=quantile(female.maths6.nomaths.pred.diff ,c(.975)),size=1.2, alpha=.3, colour="blue", linetype="twodash")+
  # Plot margins
  theme(plot.margin = unit(c(0.35, 0.2, 0.3, 0.35), "cm"))    

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
png ("BCS_prediction.png", width=60, height=48, units = "cm", res=600) 
multiplot(p1,p3,p4,p2,p5,p6, cols=2)
dev.off()